no 2000

library(dtwclust)
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
## Loading required package: dtw
## Loaded dtw v1.23-1. See ?dtw for help, citation("dtw") for use in publication.
## dtwclust:
## Setting random number generator to L'Ecuyer-CMRG (see RNGkind()).
## To read the included vignettes type: browseVignettes("dtwclust").
## See news(package = "dtwclust") after package updates.
library(dtw)
df<-read.csv("../clean_data/score_list_year.csv")
df
df <- subset(df, select = -c(X) )
df
#colnames(df)[colnames(df) == "X2000"] = "2000"
colnames(df)[colnames(df) == "X2003"] = "2003"
colnames(df)[colnames(df) == "X2005"] = "2005"
colnames(df)[colnames(df) == "X2007"] = "2007"
colnames(df)[colnames(df) == "X2009"] = "2009"
colnames(df)[colnames(df) == "X2011"] = "2011"
colnames(df)[colnames(df) == "X2013"] = "2013"
colnames(df)[colnames(df) == "X2015"] = "2015"
colnames(df)[colnames(df) == "X2017"] = "2017"
colnames(df)[colnames(df) == "X2019"] = "2019"
colnames(df)[colnames(df) == "X2022"] = "2022"
df
jurisdiction = df[['Jurisdiction']]
dtw_df <- df[, -1]
head(dtw_df)
df_lst <- tslist(dtw_df)
remove_nan <- function(ts) {
  ts[!is.na(ts)]
}

# Apply the function to each time series in the list
df_lst <- lapply(df_lst, remove_nan)
head(df_lst)
## $`1`
##  [1] 230 230 232 231 233 231 228 229 225 223
## 
## $`2`
##  [1] 226 232 230 236 236 236 237 237 236 233
## 
## $`3`
##  [1] 232 238 234 238 240 235 230 232 230 229
## 
## $`4`
##  [1] 228 233 234 235 240 238 238 238 236 229
## 
## $`5`
##  [1] 230 235 232 232 234 234 232 230 230 227
## 
## $`6`
##  [1] 236 242 241 242 247 244 243 240 239 235

index score evaluation

df_cvi <- list()
for (i in 2:10){
  df_clust <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw_basic", centroid = "pam")
  df_metric <- cvi(df_clust, type = "valid", log.base = 10)
  df_cvi <- append(df_cvi, list(df_metric))
}
df_cvi_ma <- do.call(rbind, df_cvi)
rw <- c("K2","K3","K4","K5","K6","K7","K8","K9","K10")
rownames(df_cvi_ma) <- rw
print(df_cvi_ma)
##           Sil SF       CH        DB    DBstar          D        COP
## K2  0.5327527  0 43.28736 0.9210059 0.9210059 0.08888889 0.23987430
## K3  0.3863705  0 28.30209 0.7932507 0.9258264 0.08888889 0.21514668
## K4  0.1295117  0 22.97979 1.6732991 1.8801323 0.08181818 0.16686699
## K5  0.2361482  0 24.91268 1.0158369 1.1395415 0.13888889 0.14873193
## K6  0.1976977  0 15.29492 1.3165721 1.6292335 0.13513514 0.14320138
## K7  0.2262670  0 16.60267 1.1701042 1.2959175 0.17241379 0.10868303
## K8  0.1497312  0 12.65337 1.5293468 1.8700981 0.22448980 0.11383528
## K9  0.1015162  0 12.06658 1.0970361 1.4288178 0.15873016 0.11301479
## K10 0.1926633  0 11.76796 1.4252631 1.6888127 0.22727273 0.09079714

– “Sil” (!): Silhouette index (Rousseeuw (1987); to be maximized).-K4 – “SF” (~): Score Function (Saitta et al. (2007); to be maximized; see notes). – “CH” (~): Calinski-Harabasz index (Arbelaitz et al. (2013); to be maximized).-k3 – “DB” (?): Davies-Bouldin index (Arbelaitz et al. (2013); to be minimized).k4 – “DBstar” (?): Modified Davies-Bouldin index (DB*) (Kim and Ramakrishna (2005); to be minimized). -k4 – “D” (!): Dunn index (Arbelaitz et al. (2013); to be maximized). k5 – “COP” (!): COP index (Arbelaitz et al. (2013); to be minimized). k9

index score with different seeds

# different seeds

df_cvi2 <- list()
for (i in 1:100){
  df_clust2 <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw_basic", centroid = "pam", seed=i)
  df_metric2 <- cvi(df_clust2, type = "valid", log.base = 10)
  df_cvi2 <- append(df_cvi2, list(df_metric2))
}
df_cvi_ma2 <- do.call(rbind, df_cvi2)
rw2 <- as.character(seq(1, 100))
rownames(df_cvi_ma2) <- rw2
print(df_cvi_ma2)
##           Sil SF       CH        DB    DBstar          D       COP
## 1   0.1874888  0 18.25596 1.3053106 1.8287413 0.08396947 0.1917782
## 2   0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 3   0.3024563  0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 4   0.1812487  0 26.39508 1.6927962 2.0586129 0.10000000 0.1595358
## 5   0.1923599  0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
## 6   0.2507965  0 19.95531 1.3104726 1.7536605 0.08148148 0.2070441
## 7   0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 8   0.2142639  0 22.92645 1.3688596 1.4739257 0.14864865 0.1638778
## 9   0.2311142  0 23.73130 1.1148819 1.1888411 0.13333333 0.1651468
## 10  0.3765237  0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 11  0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 12  0.2993477  0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 13  0.2830807  0 19.19311 1.3461046 1.8551322 0.08888889 0.2064483
## 14  0.2061031  0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
## 15  0.1440264  0 20.38992 1.8710270 2.1120158 0.09803922 0.1776847
## 16  0.2061031  0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
## 17  0.3765237  0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 18  0.2756091  0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
## 19  0.3765237  0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 20  0.3024563  0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 21  0.1332506  0 15.41930 1.4563920 1.5887485 0.08888889 0.2194493
## 22  0.1975725  0 21.11861 1.3645201 1.5256517 0.13750000 0.1576086
## 23  0.3073942  0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 24  0.2993477  0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 25  0.2556192  0 20.88889 1.8150376 2.0616855 0.08888889 0.1917037
## 26  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 27  0.2643667  0 19.45326 1.4257934 1.8834036 0.08888889 0.2052309
## 28  0.3024563  0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 29  0.2359621  0 17.80887 1.0671383 1.3250603 0.08928571 0.1846214
## 30  0.2643303  0 23.68913 1.5044179 1.6082708 0.14634146 0.1623024
## 31  0.2198519  0 18.46799 1.3344687 1.8283955 0.08148148 0.2023604
## 32  0.1923599  0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
## 33  0.2909841  0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
## 34  0.2611797  0 24.10611 1.5499854 1.5889159 0.10989011 0.1684780
## 35  0.3907959  0 24.86772 1.0765682 1.2718168 0.14634146 0.1685981
## 36  0.2634083  0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
## 37  0.1680235  0 18.82338 1.2568437 1.7326249 0.08148148 0.2077470
## 38  0.2403821  0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
## 39  0.2379958  0 23.99054 1.2376829 1.3529575 0.13513514 0.1704521
## 40  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 41  0.2302802  0 17.49444 1.2092021 1.5823800 0.07633588 0.1926561
## 42  0.2102223  0 18.20317 1.2852829 1.5978954 0.11764706 0.1790028
## 43  0.2710659  0 22.93085 1.6051506 1.6544439 0.14285714 0.1740123
## 44  0.2143036  0 26.28056 1.1814812 1.2643829 0.11842105 0.1468198
## 45  0.3076958  0 24.30530 1.0263258 1.1141560 0.14492754 0.1529607
## 46  0.2284966  0 21.51658 1.7707353 1.8736810 0.14285714 0.1662717
## 47  0.2798071  0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
## 48  0.1955145  0 28.86993 1.3117774 1.6050695 0.10000000 0.1582702
## 49  0.3073942  0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 50  0.3256208  0 31.14627 0.9078543 0.9346023 0.18055556 0.1547116
## 51  0.2798071  0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
## 52  0.1941546  0 14.12701 2.5014473 2.8671010 0.08396947 0.2068573
## 53  0.3765237  0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 54  0.2233182  0 26.66568 1.1781803 1.2631824 0.11842105 0.1467950
## 55  0.2993477  0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 56  0.1911848  0 23.52348 1.8284677 1.9798721 0.12195122 0.1617180
## 57  0.2128360  0 22.79792 1.9260579 2.0171403 0.13414634 0.1536275
## 58  0.2168573  0 18.38749 1.4443697 2.0629950 0.08148148 0.1996360
## 59  0.2823329  0 19.36654 1.2757070 1.7666756 0.08888889 0.2064461
## 60  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 61  0.2993477  0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 62  0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 63  0.1354536  0 14.17592 1.9028523 2.3671551 0.08928571 0.1945729
## 64  0.3073942  0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 65  0.2909841  0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
## 66  0.3747658  0 30.45188 0.6940726 0.7647676 0.18644068 0.1538688
## 67  0.1900115  0 18.98990 1.1484432 1.1689030 0.13513514 0.1558790
## 68  0.1170078  0 18.68287 1.1717777 1.6333408 0.07633588 0.2021902
## 69  0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 70  0.2253970  0 21.77116 1.6244867 1.7485403 0.10989011 0.1609345
## 71  0.2628665  0 20.31148 1.4623198 1.5763948 0.15873016 0.1568197
## 72  0.2879393  0 20.40829 0.9871044 1.4338029 0.08888889 0.2050737
## 73  0.3612311  0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
## 74  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 75  0.2456433  0 22.39671 1.3276921 1.4104741 0.12162162 0.1605648
## 76  0.2078312  0 18.82915 1.6932078 2.2658465 0.08396947 0.1958815
## 77  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 78  0.2382307  0 20.72286 1.7611842 2.0391447 0.08888889 0.1929157
## 79  0.2094742  0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
## 80  0.1932313  0 18.46895 1.3260993 1.7516665 0.08928571 0.1809625
## 81  0.2403821  0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
## 82  0.2118094  0 22.29318 0.9943062 1.1051276 0.11764706 0.1815512
## 83  0.3801553  0 24.84512 1.2020464 1.2857002 0.13414634 0.1644943
## 84  0.3765237  0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 85  0.2904063  0 24.23261 1.1792735 1.3501068 0.14492754 0.1494218
## 86  0.3024563  0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 87  0.2756091  0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
## 88  0.1984354  0 29.00511 1.3175280 1.6084602 0.10000000 0.1584127
## 89  0.3612311  0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
## 90  0.2391533  0 20.68884 1.8402949 1.9491779 0.13513514 0.1618730
## 91  0.2750848  0 22.27727 1.2633329 1.3011376 0.13513514 0.1618074
## 92  0.2634083  0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
## 93  0.3582754  0 28.67233 0.6869252 0.7535149 0.13750000 0.1549651
## 94  0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 95  0.2094742  0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
## 96  0.3546810  0 25.16784 1.0903469 1.1321172 0.10975610 0.1633902
## 97  0.3656055  0 24.86673 1.1657459 1.2349088 0.14634146 0.1654051
## 98  0.1932916  0 17.70319 1.3395919 1.8360966 0.08396947 0.1921884
## 99  0.3744142  0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 100 0.3571977  0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855

Cluster evaluation

for (i in 2:10){df_clust_opt <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt)}

Seeds evaluation

for (i in 1:10){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}

for (i in 11:20){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}

for (i in 21:30){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}

We using cluster=4

#k4
df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt_final)

Plot with detailed states

# Extract cluster assignments
cluster_assignments <- df_clust_opt_final@cluster

# Determine the number of clusters
num_clusters <- max(cluster_assignments)

# Loop through each cluster and print the jurisdictions in it
for (cluster_number in 1:num_clusters) {
  cat("Jurisdictions in Cluster", cluster_number, ":\n")
  
  # Find the indices of jurisdictions in this cluster
  indices_in_cluster <- which(cluster_assignments == cluster_number)
  
  # Print the jurisdictions corresponding to these indices
  print(jurisdiction[indices_in_cluster])
  
  cat("\n") # Add a newline for readability
}
## Jurisdictions in Cluster 1 :
## [1] "Alaska"         "Delaware"       "Illinois"       "Michigan"      
## [5] "New York"       "Oklahoma"       "Oregon"         "South Carolina"
## [9] "West Virginia" 
## 
## Jurisdictions in Cluster 2 :
##  [1] "Connecticut"   "Florida"       "Indiana"       "Iowa"         
##  [5] "Massachusetts" "Minnesota"     "Montana"       "Nebraska"     
##  [9] "New Hampshire" "New Jersey"    "North Dakota"  "Pennsylvania" 
## [13] "South Dakota"  "Texas"         "Utah"          "Virginia"     
## [17] "Wisconsin"     "Wyoming"      
## 
## Jurisdictions in Cluster 3 :
##  [1] "Alabama"      "Arizona"      "Arkansas"     "California"   "Georgia"     
##  [6] "Hawaii"       "Kentucky"     "Louisiana"    "Mississippi"  "Missouri"    
## [11] "Nevada"       "New Mexico"   "Rhode Island" "Tennessee"   
## 
## Jurisdictions in Cluster 4 :
##  [1] "Colorado"       "Idaho"          "Kansas"         "Maine"         
##  [5] "Maryland"       "National"       "North Carolina" "Ohio"          
##  [9] "Vermont"        "Washington"
# Load necessary libraries
library(ggplot2)
library(reshape2)

# Loop through each cluster
for (cluster_number in 1:num_clusters) {
  cat("Plotting jurisdictions in Cluster", cluster_number, ":\n")
  
  # Find the indices of jurisdictions in this cluster
  indices_in_cluster <- which(cluster_assignments == cluster_number)
  
  # Get the names of the jurisdictions in this cluster
  jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
  
  # Filter the gap_list_year data frame for these jurisdictions
  cluster_data <- df[df$Jurisdiction %in% jurisdictions_in_cluster, ]
  
  # Convert the data to long format for ggplot
  long_df <- melt(cluster_data, id.vars = "Jurisdiction", variable.name = "Year", value.name = "Value")
  
  # Plot
  p <- ggplot(long_df, aes(x = Year, y = Value, group = Jurisdiction, color = Jurisdiction)) +
    geom_line() +
    labs(title = paste("Cluster", cluster_number), x = "Year", y = "Scale Score") +
    theme(legend.position = "right")
  
  print(p)

  #ggsave(paste("cluster_", cluster_number, ".png", sep=""), plot = p)
}
## Plotting jurisdictions in Cluster 1 :

## Plotting jurisdictions in Cluster 2 :

## Plotting jurisdictions in Cluster 3 :

## Plotting jurisdictions in Cluster 4 :